The heatmap of the
data
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
Correlation
Matrix of the Data
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
The
decorrelation
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> X58491_s_at X44816_s_at AFFX_BioB_M_at X54789_at X43849_s_at X52300_f_at
#> X41880_at X41881_at X41882_at X41883_at X41884_at X41885_at
#> 0.7446117 0.4186212 0.2634707 0.3645800 0.6194929 0.1778922
#>
#> Included: 12620 , Uni p: 1.18859e-05 , Base Size: 37 , Rcrit: 0.3697266
#>
#>
1 <R=0.988,thr=0.950>, Top: 24< 1 >[Fa= 24 ]( 24 , 27 , 0 ),<|><>Tot Used: 51 , Added: 27 , Zero Std: 0 , Max Cor: 0.949
#>
2 <R=0.949,thr=0.900>, Top: 40< 1 >[Fa= 59 ]( 40 , 45 , 24 ),<|><>Tot Used: 130 , Added: 45 , Zero Std: 0 , Max Cor: 0.915
#>
3 <R=0.915,thr=0.900>, Top: 2< 1 >[Fa= 61 ]( 2 , 2 , 59 ),<|><>Tot Used: 134 , Added: 2 , Zero Std: 0 , Max Cor: 0.908
#>
4 <R=0.908,thr=0.900>, Top: 1< 1 >[Fa= 62 ]( 1 , 1 , 61 ),<|><>Tot Used: 136 , Added: 1 , Zero Std: 0 , Max Cor: 0.899
#>
5 <R=0.899,thr=0.800>, Top: 135< 1 >.[Fa= 186 ]( 134 , 208 , 62 ),<|><>Tot Used: 460 , Added: 208 , Zero Std: 0 , Max Cor: 0.890
#>
6 <R=0.890,thr=0.800>, Top: 23< 2 >[Fa= 207 ]( 21 , 28 , 186 ),<|><>Tot Used: 509 , Added: 28 , Zero Std: 0 , Max Cor: 0.816
#>
7 <R=0.816,thr=0.800>, Top: 2< 1 >[Fa= 209 ]( 2 , 2 , 207 ),<|><>Tot Used: 513 , Added: 2 , Zero Std: 0 , Max Cor: 0.800
#>
8 <R=0.800,thr=0.800>
#>
[ 8 ], 0.7988513 Decor Dimension: 513 Nused: 513 . Cor to Base: 300 , ABase: 12620 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
783
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
719
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
4.71
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
4.29
varratio <- attr(DEdataframe,"VarRatio")
pander::pander(tail(varratio))
| 0.038 |
0.0368 |
0.027 |
0.0239 |
0.0238 |
0.023 |
The decorrelation
matrix
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPLTM <- attr(DEdataframe,"UPLTM")
gplots::heatmap.2(1.0*(abs(UPLTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
Formulas
Network
Displaying the features associations
par(op)
clustable <- c("To many variables")
transform <- attr(DEdataframe,"UPLTM") != 0
tnames <- colnames(transform)
colnames(transform) <- str_remove_all(colnames(transform),"La_")
transform <- abs(transform*cor(dataframe[,rownames(transform)])) # The weights are proportional to the observed correlation
fscore <- attr(DEdataframe,"fscore")
VertexSize <- fscore # The size depends on the variable independence relevance (fscore)
names(VertexSize) <- str_remove_all(names(VertexSize),"La_")
VertexSize <- 10*(VertexSize-min(VertexSize))/(max(VertexSize)-min(VertexSize)) # Normalization
VertexSize <- VertexSize[rownames(transform)]
rsum <- apply(1*(transform !=0),1,sum) + 0.01*VertexSize + 0.001*varratio[tnames]
csum <- apply(1*(transform !=0),2,sum) + 0.01*VertexSize + 0.001*varratio[tnames]
ntop <- min(10,length(rsum))
topfeatures <- unique(c(names(rsum[order(-rsum)])[1:ntop],names(csum[order(-csum)])[1:ntop]))
rtrans <- transform[topfeatures,]
csum <- (apply(1*(rtrans !=0),2,sum) > 1*(colnames(rtrans) %in% topfeatures))
rtrans <- rtrans[,csum]
topfeatures <- unique(c(topfeatures,colnames(rtrans)))
print(ncol(transform))
[1] 513
transform <- transform[topfeatures,topfeatures]
print(ncol(transform))
[1] 102
if (ncol(transform)>100)
{
csum <- apply(1*(transform !=0),1,sum)
csum <- csum[csum > 1]
csum <- csum + 0.01*VertexSize[names(csum)]
csum <- csum[order(-csum)]
tpsum <- min(20,length(csum))
trsum <- rownames(transform)[rownames(transform) %in% names(csum[1:tpsum])]
rtrans <- transform[trsum,]
topfeatures <- unique(c(rownames(rtrans),colnames(rtrans)))
transform <- transform[topfeatures,topfeatures]
if (nrow(transform) > 150)
{
csum <- apply(1*(rtrans != 0 ),2,sum)
csum <- csum + 0.01*VertexSize[names(csum)]
csum <- csum[order(-csum)]
tpsum <- min(130,length(csum))
csum <- rownames(transform)[rownames(transform) %in% names(csum[1:tpsum])]
csum <- unique(c(trsum,csum))
transform <- transform[csum,csum]
}
print(ncol(transform))
}
[1] 102
if (ncol(transform) < 150)
{
gplots::heatmap.2(transform,
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Red Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
VertexSize <- VertexSize[colnames(transform)]
gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
gr$layout <- layout_with_fr
fc <- cluster_optimal(gr)
plot(fc, gr,
edge.width = 2*E(gr)$weight,
vertex.size=VertexSize,
edge.arrow.size=0.5,
edge.arrow.width=0.5,
vertex.label.cex=(0.15+0.05*VertexSize),
vertex.label.dist=0.5 + 0.05*VertexSize,
main="Top Feature Association")
varratios <- varratio
fscores <- fscore
names(varratios) <- str_remove_all(names(varratios),"La_")
names(fscores) <- str_remove_all(names(fscores),"La_")
dc <- getLatentCoefficients(DEdataframe)
theCharformulas <- attr(dc,"LatentCharFormulas")
clustable <- as.data.frame(cbind(Variable=fc$names,
Formula=as.character(theCharformulas[paste("La_",fc$names,sep="")]),
Class=fc$membership,
ResidualVariance=round(varratios[fc$names],3),
Fscore=round(fscores[fc$names],3)
)
)
rownames(clustable) <- str_replace_all(rownames(clustable),"__","_")
clustable$Variable <- NULL
clustable$Class <- as.integer(clustable$Class)
clustable$ResidualVariance <- as.numeric(clustable$ResidualVariance)
clustable$Fscore <- as.numeric(clustable$Fscore)
clustable <- clustable[order(-clustable$Fscore),]
clustable <- clustable[order(clustable$Class),]
clustable <- clustable[clustable$Fscore >= -1,]
topv <- min(50,nrow(clustable))
clustable <- clustable[1:topv,]
}


pander::pander(clustable)
| X48039_at |
NA |
1 |
1.000 |
40 |
| X43355_s_at |
+ X43355_s_at - (0.470)X48039_at |
1 |
0.285 |
-1 |
| X43395_s_at |
+ X43395_s_at - (0.273)X48039_at |
1 |
0.344 |
-1 |
| X43817_f_at |
+ X43817_f_at - (0.618)X48039_at |
1 |
0.294 |
-1 |
| X44088_at |
+ X44088_at - (0.449)X48039_at |
1 |
0.256 |
-1 |
| X44119_at |
+ X44119_at - (0.413)X48039_at |
1 |
0.283 |
-1 |
| X44746_at |
+ X44746_at - (0.219)X48039_at |
1 |
0.355 |
-1 |
| X45178_at |
+ X45178_at - (0.218)X48039_at |
1 |
0.359 |
-1 |
| X45199_at |
+ X45199_at - (0.535)X48039_at |
1 |
0.345 |
-1 |
| X45680_at |
+ X45680_at - (0.327)X48039_at |
1 |
0.281 |
-1 |
| X45777_at |
+ X45777_at - (0.367)X48039_at |
1 |
0.300 |
-1 |
| X46276_at |
+ X46276_at - (0.879)X48039_at |
1 |
0.234 |
-1 |
| X46314_at |
+ X46314_at - (0.450)X48039_at |
1 |
0.300 |
-1 |
| X46476_at |
+ X46476_at - (0.655)X48039_at |
1 |
0.239 |
-1 |
| X48927_at |
- (0.383)X48039_at + X48927_at |
1 |
0.332 |
-1 |
| X50001_at |
- (0.519)X48039_at + X50001_at |
1 |
0.262 |
-1 |
| X50361_at |
- (0.929)X48039_at + X50361_at |
1 |
0.077 |
-1 |
| X51939_at |
- (1.007)X48039_at + X51939_at |
1 |
0.151 |
-1 |
| X52036_at |
- (0.293)X48039_at + X52036_at |
1 |
0.316 |
-1 |
| X52140_at |
- (0.756)X48039_at + X52140_at |
1 |
0.164 |
-1 |
| X52856_at |
- (0.322)X48039_at + X52856_at |
1 |
0.303 |
-1 |
| X52946_at |
- (0.518)X48039_at + X52946_at |
1 |
0.251 |
-1 |
| X53785_at |
- (0.383)X48039_at + X53785_at |
1 |
0.265 |
-1 |
| X53796_at |
- (0.617)X48039_at + X53796_at |
1 |
0.288 |
-1 |
| X54063_at |
- (0.514)X48039_at + X54063_at |
1 |
0.233 |
-1 |
| X54668_at |
- (0.628)X48039_at + X54668_at |
1 |
0.308 |
-1 |
| X54713_at |
- (0.474)X48039_at + X54713_at |
1 |
0.262 |
-1 |
| X54992_at |
- (0.345)X48039_at + X54992_at |
1 |
0.302 |
-1 |
| X55077_at |
- (0.356)X48039_at + X55077_at |
1 |
0.308 |
-1 |
| X56192_at |
- (0.501)X48039_at + X56192_at |
1 |
0.348 |
-1 |
| X56474_at |
- (0.586)X48039_at + X56474_at |
1 |
0.234 |
-1 |
| X57194_at |
- (0.380)X48039_at + X57194_at |
1 |
0.249 |
-1 |
| X57709_at |
- (0.205)X48039_at + X57709_at |
1 |
0.314 |
-1 |
| X58917_at |
- (0.179)X48039_at + X58917_at |
1 |
0.338 |
-1 |
| X47281_r_at |
NA |
2 |
1.000 |
7 |
| X44476_r_at |
+ X44476_r_at - (0.781)X47281_r_at |
2 |
0.340 |
-1 |
| X45020_r_at |
+ X45020_r_at - (0.533)X47281_r_at |
2 |
0.316 |
-1 |
| X46940_r_at |
+ X46940_r_at - (0.527)X47281_r_at |
2 |
0.357 |
-1 |
| X47428_r_at |
- (0.514)X47281_r_at + X47428_r_at |
2 |
0.344 |
-1 |
| X47708_r_at |
- (0.663)X47281_r_at + X47708_r_at |
2 |
0.246 |
-1 |
| X48326_r_at |
- (0.584)X47281_r_at + X48326_r_at |
2 |
0.337 |
-1 |
| X54192_r_at |
- (0.600)X47281_r_at + X54192_r_at |
2 |
0.270 |
-1 |
| X44986_s_at |
NA |
3 |
1.000 |
6 |
| X44841_at |
+ X44841_at - (0.888)X44986_s_at |
3 |
0.110 |
-1 |
| X45628_at |
- (0.728)X44986_s_at + X45628_at |
3 |
0.337 |
-1 |
| X46698_at |
- (0.907)X44986_s_at + X46698_at |
3 |
0.336 |
-1 |
| X53793_at |
- (1.184)X44986_s_at + X53793_at |
3 |
0.286 |
-1 |
| X54865_at |
- (1.250)X44986_s_at + X54865_at |
3 |
0.359 |
-1 |
| X55722_at |
- (1.070)X44986_s_at + X55722_at |
3 |
0.310 |
-1 |
| X47112_at |
NA |
4 |
1.000 |
6 |
par(op)
U-MAP Visualization
of features
The UMAP on Raw
Data
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
topvars <- univariate_BinEnsemble(dataframe,outcome)
lso <- LASSO_MIN(formula(paste(outcome,"~.")),dataframe,family="binomial")
topvars <- unique(c(names(topvars),lso$selectedfeatures))
pander::pander(head(topvars))
X56456_at, X52119_at, X45793_at,
X46410_at, X54033_at and X54063_at
# names(topvars)
#if (nrow(dataframe) < 1000)
#{
datasetframe.umap = umap(scale(dataframe[1:numsub,topvars]),n_components=2)
# datasetframe.umap = umap(dataframe[1:numsub,varlist],n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])

#}
The decorralted
UMAP
varlistcV <- names(varratio[varratio >= 0.01])
topvars <- univariate_BinEnsemble(DEdataframe[,varlistcV],outcome)
lso <- LASSO_MIN(formula(paste(outcome,"~.")),DEdataframe[,varlistcV],family="binomial")
topvars <- unique(c(names(topvars),lso$selectedfeatures))
pander::pander(head(topvars))
X56456_at, X52119_at, X45793_at,
X46410_at, X54033_at and X56471_at
varlistcV <- varlistcV[varlistcV != outcome]
# DEdataframe[,outcome] <- as.numeric(DEdataframe[,outcome])
#if (nrow(dataframe) < 1000)
#{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,topvars]),n_components=2)
# datasetframe.umap = umap(DEdataframe[1:numsub,varlistcV],n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After ILAA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])

#}
Univariate
Analysis
Univariate
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : X41979_r_at 200 : X42079_at 300 : X42179_at 400 : X42279_at 500
: X42379_at
600 : X42479_r_at 700 : X42579_at 800 : X42679_at 900 : X42779_at 1000 :
X42879_at
1100 : X42979_at 1200 : X43079_at 1300 : X43179_at 1400 : X43279_at 1500
: X43379_f_at
1600 : X43479_at 1700 : X43579_at 1800 : X43679_at 1900 : X43779_at 2000
: X43879_at
2100 : X43979_r_at 2200 : X44079_at 2300 : X44179_at 2400 : X44279_at
2500 : X44379_at
2600 : X44479_at 2700 : X44579_at 2800 : X44679_at 2900 : X44779_at 3000
: X44879_at
3100 : X44979_at 3200 : X45079_at 3300 : X45179_at 3400 : X45279_at 3500
: X45379_at
3600 : X45479_at 3700 : X45579_at 3800 : X45679_at 3900 : X45779_at 4000
: X45879_at
4100 : X45979_at 4200 : X46079_at 4300 : X46179_f_at 4400 : X46279_at
4500 : X46379_i_at
4600 : X46479_at 4700 : X46579_at 4800 : X46679_at 4900 : X46779_r_at
5000 : X46879_at
5100 : X46979_at 5200 : X47079_at 5300 : X47179_r_at 5400 : X47279_r_at
5500 : X47379_at
5600 : X47479_at 5700 : X47579_at 5800 : X47679_at 5900 : X47779_at 6000
: X47879_at
6100 : X47979_at 6200 : X48079_at 6300 : X48179_at 6400 : X48279_at 6500
: X48379_at
6600 : X48479_at 6700 : X48579_r_at 6800 : X48790_s_at 6900 : X48981_at
7000 : X49161_at
7100 : X49345_at 7200 : X49549_at 7300 : X49727_at 7400 : X49908_at 7500
: X50100_at
7600 : X50266_at 7700 : X50463_at 7800 : X50660_r_at 7900 : X50860_at
8000 : X51020_at
8100 : X51179_at 8200 : X51365_at 8300 : X51567_at 8400 : X51747_at 8500
: X51927_at
8600 : X52116_at 8700 : X52327_s_at 8800 : X52550_s_at 8900 : X52760_at
9000 : X52958_at
9100 : X53176_at 9200 : X53374_at 9300 : X53575_at 9400 : X53781_at 9500
: X53965_at
9600 : X54171_s_at 9700 : X54367_at 9800 : X54557_at 9900 : X54755_at
10000 : X54962_f_at
10100 : X55157_at 10200 : X55359_at 10300 : X55532_at 10400 : X55709_at
10500 : X55902_at
10600 : X56086_at 10700 : X56263_at 10800 : X56441_at 10900 : X56637_at
11000 : X56832_at
11100 : X57023_at 11200 : X57205_at 11300 : X57382_at 11400 : X57590_at
11500 : X57797_at
11600 : X57996_at 11700 : X58195_at 11800 : X58394_g_at 11900 :
X58617_at 12000 : X58812_at
12100 : X58984_at 12200 : X59194_at 12300 : X59378_at 12400 : X59555_at
12500 : X59729_at
12600 : AFFX_LysX_M_at
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : X41979_r_at 200 : X42079_at 300 : X42179_at 400 : X42279_at 500
: X42379_at
600 : X42479_r_at 700 : X42579_at 800 : X42679_at 900 : X42779_at 1000 :
X42879_at
1100 : X42979_at 1200 : X43079_at 1300 : X43179_at 1400 : X43279_at 1500
: X43379_f_at
1600 : X43479_at 1700 : X43579_at 1800 : X43679_at 1900 : X43779_at 2000
: X43879_at
2100 : X43979_r_at 2200 : X44079_at 2300 : X44179_at 2400 : X44279_at
2500 : X44379_at
2600 : X44479_at 2700 : X44579_at 2800 : X44679_at 2900 : X44779_at 3000
: X44879_at
3100 : X44979_at 3200 : X45079_at 3300 : X45179_at 3400 : X45279_at 3500
: X45379_at
3600 : X45479_at 3700 : La_X45579_at 3800 : X45679_at 3900 : X45779_at
4000 : X45879_at
4100 : X45979_at 4200 : X46079_at 4300 : La_X46179_f_at 4400 : X46279_at
4500 : X46379_i_at
4600 : X46479_at 4700 : X46579_at 4800 : X46679_at 4900 : X46779_r_at
5000 : X46879_at
5100 : X46979_at 5200 : X47079_at 5300 : X47179_r_at 5400 : X47279_r_at
5500 : X47379_at
5600 : X47479_at 5700 : X47579_at 5800 : X47679_at 5900 : X47779_at 6000
: X47879_at
6100 : X47979_at 6200 : X48079_at 6300 : X48179_at 6400 : X48279_at 6500
: X48379_at
6600 : X48479_at 6700 : X48579_r_at 6800 : X48790_s_at 6900 : X48981_at
7000 : X49161_at
7100 : X49345_at 7200 : X49549_at 7300 : X49727_at 7400 : X49908_at 7500
: X50100_at
7600 : X50266_at 7700 : X50463_at 7800 : X50660_r_at 7900 : X50860_at
8000 : X51020_at
8100 : X51179_at 8200 : X51365_at 8300 : X51567_at 8400 : X51747_at 8500
: X51927_at
8600 : X52116_at 8700 : X52327_s_at 8800 : X52550_s_at 8900 : X52760_at
9000 : X52958_at
9100 : X53176_at 9200 : X53374_at 9300 : X53575_at 9400 : X53781_at 9500
: X53965_at
9600 : X54171_s_at 9700 : X54367_at 9800 : X54557_at 9900 : X54755_at
10000 : X54962_f_at
10100 : X55157_at 10200 : X55359_at 10300 : X55532_at 10400 : X55709_at
10500 : X55902_at
10600 : X56086_at 10700 : X56263_at 10800 : X56441_at 10900 : X56637_at
11000 : X56832_at
11100 : X57023_at 11200 : X57205_at 11300 : X57382_at 11400 : X57590_at
11500 : X57797_at
11600 : X57996_at 11700 : X58195_at 11800 : X58394_g_at 11900 :
X58617_at 12000 : X58812_at
12100 : X58984_at 12200 : X59194_at 12300 : X59378_at 12400 : X59555_at
12500 : X59729_at
12600 : AFFX_LysX_M_at
Final Table
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##top variables
topvar <- c(1:length(varlist)) <= TopVariables
tableRaw <- univarRAW$orderframe[topvar,univariate_columns]
pander::pander(tableRaw)
| X56456_at |
5.76 |
0.515 |
6.36 |
0.494 |
0.999 |
0.804 |
| X46410_at |
3.57 |
0.370 |
4.03 |
0.492 |
0.281 |
0.796 |
| X45793_at |
5.61 |
0.396 |
6.03 |
0.344 |
0.989 |
0.794 |
| X54865_at |
3.64 |
0.513 |
4.26 |
0.559 |
0.521 |
0.792 |
| X51726_at |
3.51 |
0.213 |
3.74 |
0.240 |
0.485 |
0.783 |
| X50361_at |
7.74 |
1.289 |
8.93 |
0.874 |
0.505 |
0.781 |
| X56474_at |
6.22 |
0.760 |
7.06 |
0.769 |
0.858 |
0.780 |
| X54063_at |
4.63 |
0.711 |
5.36 |
0.629 |
0.999 |
0.780 |
| X45178_at |
4.30 |
0.299 |
4.64 |
0.326 |
0.988 |
0.779 |
| X43355_s_at |
6.02 |
0.652 |
6.69 |
0.623 |
0.466 |
0.779 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
pander::pander(finalTable)
| X56456_at |
5.763 |
0.5154 |
6.361 |
0.4938 |
0.9988 |
0.804 |
| X46410_at |
3.566 |
0.3704 |
4.026 |
0.4919 |
0.2809 |
0.796 |
| X45793_at |
5.608 |
0.3965 |
6.028 |
0.3445 |
0.9888 |
0.794 |
| X51726_at |
3.505 |
0.2133 |
3.735 |
0.2397 |
0.4849 |
0.783 |
| X45156_at |
6.554 |
0.3967 |
6.940 |
0.3186 |
0.9029 |
0.779 |
| X45674_at |
3.080 |
0.3720 |
3.452 |
0.3399 |
0.9674 |
0.778 |
| X45340_at |
4.557 |
0.4729 |
4.997 |
0.4290 |
0.8732 |
0.778 |
| X52119_at |
6.167 |
0.4012 |
5.811 |
0.3591 |
0.0399 |
0.771 |
| X53011_at |
4.530 |
0.4330 |
4.932 |
0.3688 |
0.7642 |
0.770 |
| X48506_at |
5.243 |
0.3233 |
5.551 |
0.2887 |
0.3136 |
0.769 |
| La_X46715_at |
0.257 |
0.2128 |
0.373 |
0.1826 |
0.6400 |
0.676 |
| La_X54865_at |
-3.644 |
0.3127 |
-3.422 |
0.3935 |
0.6602 |
0.664 |
| La_X46287_at |
3.222 |
0.2326 |
3.335 |
0.1992 |
0.4845 |
0.661 |
| La_X45802_at |
1.976 |
0.0713 |
2.023 |
0.0804 |
0.9086 |
0.660 |
| La_X48760_s_at |
-0.776 |
0.1906 |
-0.676 |
0.1474 |
0.4877 |
0.656 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
theCharformulas <- attr(dc,"LatentCharFormulas")
topvar <- rownames(tableRaw)
finalTable <- rbind(finalTable,tableRaw[topvar[!(topvar %in% topLAvar)],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- theCharformulas[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
finalTable$varratio <- varratio[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores","varratio")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| X56456_at |
NA |
5.763 |
0.5154 |
6.361 |
0.4938 |
0.9988 |
0.804 |
0.804 |
0 |
1.000 |
| X46410_at |
NA |
3.566 |
0.3704 |
4.026 |
0.4919 |
0.2809 |
0.796 |
0.796 |
0 |
1.000 |
| X45793_at |
NA |
5.608 |
0.3965 |
6.028 |
0.3445 |
0.9888 |
0.794 |
0.794 |
1 |
1.000 |
| X54865_at |
NA |
3.643 |
0.5133 |
4.259 |
0.5589 |
0.5215 |
0.792 |
0.792 |
NA |
NA |
| X51726_at |
NA |
3.505 |
0.2133 |
3.735 |
0.2397 |
0.4849 |
0.783 |
0.783 |
0 |
1.000 |
| X50361_at |
NA |
7.736 |
1.2892 |
8.927 |
0.8741 |
0.5051 |
0.781 |
0.781 |
NA |
NA |
| X56474_at |
NA |
6.220 |
0.7597 |
7.057 |
0.7690 |
0.8581 |
0.780 |
0.780 |
NA |
NA |
| X54063_at |
NA |
4.632 |
0.7106 |
5.357 |
0.6288 |
0.9989 |
0.780 |
0.780 |
NA |
NA |
| X45178_at |
NA |
4.303 |
0.2994 |
4.638 |
0.3256 |
0.9885 |
0.779 |
0.779 |
NA |
NA |
| X43355_s_at |
NA |
6.016 |
0.6523 |
6.694 |
0.6229 |
0.4656 |
0.779 |
0.779 |
NA |
NA |
| X45156_at |
NA |
6.554 |
0.3967 |
6.940 |
0.3186 |
0.9029 |
0.779 |
0.779 |
0 |
1.000 |
| X45674_at |
NA |
3.080 |
0.3720 |
3.452 |
0.3399 |
0.9674 |
0.778 |
0.778 |
0 |
1.000 |
| X45340_at |
NA |
4.557 |
0.4729 |
4.997 |
0.4290 |
0.8732 |
0.778 |
0.778 |
1 |
1.000 |
| X52119_at |
NA |
6.167 |
0.4012 |
5.811 |
0.3591 |
0.0399 |
0.771 |
0.771 |
0 |
1.000 |
| X53011_at |
NA |
4.530 |
0.4330 |
4.932 |
0.3688 |
0.7642 |
0.770 |
0.770 |
0 |
1.000 |
| X48506_at |
NA |
5.243 |
0.3233 |
5.551 |
0.2887 |
0.3136 |
0.769 |
0.769 |
0 |
1.000 |
| La_X46715_at |
+ X46715_at - (0.765)X46739_at |
0.257 |
0.2128 |
0.373 |
0.1826 |
0.6400 |
0.676 |
0.689 |
-1 |
0.321 |
| La_X54865_at |
- (1.250)X44986_s_at + X54865_at |
-3.644 |
0.3127 |
-3.422 |
0.3935 |
0.6602 |
0.664 |
0.792 |
-1 |
0.359 |
| La_X46287_at |
+ X46287_at - (0.478)X52333_f_at |
3.222 |
0.2326 |
3.335 |
0.1992 |
0.4845 |
0.661 |
0.726 |
-1 |
0.320 |
| La_X45802_at |
- (0.754)X44787_s_at + X45802_at |
1.976 |
0.0713 |
2.023 |
0.0804 |
0.9086 |
0.660 |
0.649 |
-1 |
0.249 |
| La_X48760_s_at |
- (1.182)X45500_at + X48760_s_at |
-0.776 |
0.1906 |
-0.676 |
0.1474 |
0.4877 |
0.656 |
0.582 |
-1 |
0.163 |
Comparing ILAA vs
PCA vs EFA
PCA
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE,tol=0.01) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")

EFA
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)-1)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
Effect on CAR
modeling
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}

pander::pander(table(dataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.887 |
0.818 |
0.937 |
| 3 |
se |
0.828 |
0.713 |
0.911 |
| 4 |
sp |
0.950 |
0.861 |
0.990 |
| 6 |
diag.or |
91.545 |
24.205 |
346.226 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe[,c(outcome,varlistcV)],control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="ILAA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}

pander::pander(table(DEdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.887 |
0.818 |
0.937 |
| 3 |
se |
0.828 |
0.713 |
0.911 |
| 4 |
sp |
0.950 |
0.861 |
0.990 |
| 6 |
diag.or |
91.545 |
24.205 |
346.226 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.831 |
0.753 |
0.892 |
| 3 |
se |
0.891 |
0.788 |
0.955 |
| 4 |
sp |
0.767 |
0.640 |
0.866 |
| 6 |
diag.or |
26.755 |
9.972 |
71.785 |
par(op)
EFA
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}

pander::pander(table(EFAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.887 |
0.818 |
0.937 |
| 3 |
se |
0.828 |
0.713 |
0.911 |
| 4 |
sp |
0.950 |
0.861 |
0.990 |
| 6 |
diag.or |
91.545 |
24.205 |
346.226 |
par(op)